This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
library(readr); library(dplyr); library(stringr); library(lubridate)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Attaching package: ‘lubridate’
The following object is masked from ‘package:base’:
date
libraryData <- read_csv("https://raw.githubusercontent.com/tpemartin/github-data/master/library100_102.csv")
Parsed with column specification:
cols(
學號 = col_character(),
書籍類別 = col_character(),
書籍名稱 = col_character(),
讀者借閱冊數 = col_integer(),
書籍出版日期 = col_integer(),
書籍館藏地 = col_character(),
書籍上架年月 = col_date(format = ""),
入學年 = col_integer(),
讀者年級 = col_integer(),
學院 = col_character(),
借閱時間 = col_character()
)
libraryData %>%
mutate_at(
vars(學院,書籍館藏地,書籍類別),
funs(
as.factor(.)
)
) -> libraryData
libraryData %>%
mutate(
借閱時間=ymd_hms(借閱時間,tz="Asia/Taipei")
) -> libraryData
## 1
max(libraryData$讀者借閱冊數)-> maxNumber
min(libraryData$讀者借閱冊數)-> minNumber
paste0("最大值: ",maxNumber, " 最小值: ",minNumber)
[1] "最大值: 613 最小值: 1"
## 2
max(libraryData$入學年)-> maxNumber
min(libraryData$入學年)-> minNumber
paste0("最大值: ",maxNumber, " 最小值: ",minNumber)
[1] "最大值: 102 最小值: 100"
## 3
max(libraryData$書籍上架年月)-> maxNumber
min(libraryData$書籍上架年月)-> minNumber
paste0("最大值: ",maxNumber, " 最小值: ",minNumber)
[1] "最大值: 2018-01-12 最小值: 1999-02-14"
minMaxFun <- function(x){
max(x)-> maxNumber
min(x)-> minNumber
paste0("最大值: ",maxNumber, " 最小值: ",minNumber)}
minMaxFun(libraryData$讀者借閱冊數)
[1] "最大值: 613 最小值: 1"
minMaxFun(libraryData$入學年)
[1] "最大值: 102 最小值: 100"
minMaxFun(libraryData$書籍上架年月)
[1] "最大值: 2018-01-12 最小值: 1999-02-14"
minMaxFun2 <- function(x){
max(x)-> maxNumber
min(x)-> minNumber
paste0("最大值: ",maxNumber, " 最小值: ",minNumber) ->
result
return(result)
}
not_numeric <- function(x){
!is.numeric(x)->result
return(result)
}
not_numeric(libraryData$學號)
[1] TRUE
#找出屬於人文學院的子樣本
(libraryData$學院=="人文學院") -> logIndex
libraryData[logIndex,]-> subsample
(libraryData$學院=="社會科學院") -> logIndex
libraryData[logIndex,]-> subsample
findMyCollege <- function(x){
(libraryData$學院==x)-> ligIndex
libraryData[logIndex,]-> subsample
return(subsample)
}
findMyCollege("人文學院")-> sample1
nrow(sample1)
[1] 15588
findMyCollege("社會科學院")-> sample2
nrow(sample2)
[1] 15588
libraryData %>%
select(讀者借閱冊數,書籍出版日期) %>%
summarise(
讀者平均借閱冊數=mean(讀者借閱冊數),
最小書籍出版日期=min(書籍出版日期,na.rm=T),
最大書籍出版日期=max(書籍出版日期,na.rm=T)
)
library(summarytools)
libraryData %>%
select_if(
is.numeric
) %>% summarytools::dfSummary() %>% summarytools::view(.)
(libraryData$書籍出版日期==9999) -> logiIndex # 回傳每筆書籍出版日期是否為9999
libraryData$書籍出版日期[logiIndex] <- # 選出為9999的樣本
NA # 將它換成NA
libraryData$書籍館藏地 %>% as.factor %>% levels -> storageLocations
storageLocations
[1] "1F寰宇開卷專區" "1F教師成長專區" "1F流通櫃檯"
[4] "1F視聽區" "1F特展區" "1F通識悅讀專區"
[7] "1F職涯博覽專區" "1F職涯博覽專區(創新創業)" "1F指定參考書區"
[10] "2F藝文展覽區" "3F中文圖書區" "4F日韓文圖書區"
[13] "4F外文圖書區" "5F本校特藏" "5F參考資料區"
[16] "6F視聽閉架區" "6F套書區" "民生校區流通櫃檯"
i<-1
# 1
storageLocation_i <- storageLocations[i]
# 2
numericalIndexForSubsample<-which(libraryData$書籍館藏地==storageLocation_i)
subsample_i <- libraryData[numericalIndexForSubsample,]
# 3
borrowCount_i <- nrow(subsample_i)
# 0
allBorrowCount <- vector("numeric",18)
i<-1
# 1
storageLocation_i <- storageLocations[i]
# 2
numericalIndexForSubsample<-which(libraryData$書籍館藏地==storageLocation_i)
subsample_i <- libraryData[numericalIndexForSubsample,]
# 3
borrowCount_i <- nrow(subsample_i)
# 4
allBorrowCount[[i]]<-borrowCount_i
# 0
allBorrowCount <- vector("numeric",18)
for(i in c(1:18)){
# 1
storageLocation_i <- storageLocations[i]
# 2
numericalIndexForSubsample<-which(libraryData$書籍館藏地==storageLocation_i)
subsample_i <- libraryData[numericalIndexForSubsample,]
# 3
borrowCount_i <- nrow(subsample_i)
# 4
allBorrowCount[[i]]<-borrowCount_i
}
#由於index i是跟著storageLocations的個數走, 可以用seq_along(storageLocations)取代c(1:18)
libraryData$學院 %>% as.factor %>% levels -> college
#0
collegeBorrowCount <- vector("numeric",length(college))
for(i in c(1:length(college))){
#1
college_i <- college[i]
#2
numericalIndexForSubsample<-which(libraryData$學院 == college_i)
collegesubsample_i <- libraryData[numericalIndexForSubsample,]
#3
collegeBorrowCount_i <- nrow(collegesubsample_i)
#4
collegeBorrowCount[[i]]<-collegeBorrowCount_i
}
result
錯誤: 找不到物件 'result'
libraryData %>%
group_by(學院,入學年) %>%
summarise(
borrowCount=length(學號)
) -> result2
result2
#選入學年為100-102且學院為社會科學院的觀測值
libraryData %>%
filter(between(入學年,100,102), 學院=="社會科學院")
#等同
libraryData %>%
filter(入學年 %>% between(100,102), 學院=="社會科學院")
a<-c(1:10)
typeof(a)
[1] "integer"
b<-list("a","b",1:10)
typeof(b)
[1] "list"
b2<-list("a","b",1:10,
list(
"Mr Smith",
data.frame(
v1=c(1,2),
v2=c("aa","bb"),
stringsAsFactors = F
)
))
library(readr)
filmVotingData <- read_csv("https://raw.githubusercontent.com/tpemartin/course-107-1-programming-for-data-science/master/data/%E7%AC%AC%E4%B8%89%E5%B1%86%E7%B6%93%E6%BF%9F%E6%92%AD%E5%AE%A2%E7%AB%B6%E8%B3%BD%E5%8F%83%E8%B3%BD%E4%BD%9C%E5%93%81%E6%8A%95%E7%A5%A8%E7%B5%90%E6%9E%9C%E6%A8%A3%E6%9C%AC%20-%20Sheet1.csv")
filmVotingData$請選擇你最喜歡的影片二部 %>%
str_split(", (?!(by))") -> filmVotingResult
#filmVotingResult
plasticContainer <- list(
greenBox=c("paper","scissor"), # location 1
redBox=c(1L,3L,2.5), # location 2
blueBox=c(TRUE,FALSE,TRUE,TRUE) # location 3
)
plasticContainer[c("greenBox","redBox")] # or plasticContainer[c(1,2)]
$greenBox
[1] "paper" "scissor"
$redBox
[1] 1.0 3.0 2.5
plasticContainer[c("greenBox")] # or plasticContainer[c(1,2)]
$greenBox
[1] "paper" "scissor"
install.packages("plotly")
There is a binary version available but the source version is later:
installing the source package ‘plotly’
嘗試 URL 'https://cran.rstudio.com/src/contrib/plotly_4.8.0.tar.gz'
Content type 'application/x-gzip' length 1860673 bytes (1.8 MB)
==================================================
downloaded 1.8 MB
* installing *source* package ‘plotly’ ...
** package ‘plotly’ successfully unpacked and MD5 sums checked
** R
** data
*** moving datasets to lazyload DB
** demo
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
*** copying figures
** building package indices
** testing if installed package can be loaded
* DONE (plotly)
The downloaded source packages are in
‘/private/var/folders/fq/xqn064jn35zdwyttl_kn79hh0000gn/T/Rtmpf51rCa/downloaded_packages’
download.file("https://github.com/tpemartin/github-data/blob/master/plotly_4070_neda.Rda?raw=true",destfile = "plotly_4070_neda.Rda")
嘗試 URL 'https://github.com/tpemartin/github-data/blob/master/plotly_4070_neda.Rda?raw=true'
Content type 'application/octet-stream' length 12400 bytes (12 KB)
==================================================
downloaded 12 KB
load("plotly_4070_neda.Rda")
library(plotly)
Loading required package: ggplot2
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
plotly_4070_neda
plotly_4070_neda$x$layout$shapes[[1]]$opacity<-0.8
plotly_4070_neda
library(readr)
transcriptData <- read_csv("https://raw.githubusercontent.com/tpemartin/github-data/master/transcript100_102.csv",
col_types = cols(
學期成績="n"
))
totalCredits<-function(x){
transcriptData[transcriptData$學號==x,]->subsample
subsample %>%
mutate(
及格=(學期成績>=60)
) %>%
summarise(
學號=學號[1],
總修習學分數=sum(學分數),
總實得學分數=sum(學分數[及格])
)
}
library(stringr)
transcriptData %>%
filter(學屆==100,str_sub(學號,5,6)=="P1") -> subsample
subsample$學號 %>% unique -> 符合條件學號
creditResults<-vector("list",length(符合條件學號))
for(i in seq_along(符合條件學號)){
creditResults[[i]] <- totalCredits(符合條件學號[i])
}
library(readr)
library(tidyr)
libraryData <- read_csv("https://raw.githubusercontent.com/tpemartin/github-data/master/libraryData2.csv")
Parsed with column specification:
cols(
學號 = col_character(),
書籍類別 = col_character(),
書籍名稱 = col_character(),
書籍出版日期 = col_integer(),
書籍館藏地 = col_character(),
書籍上架年月 = col_date(format = ""),
入學年 = col_integer(),
讀者年級 = col_integer(),
學院 = col_character(),
借閱時間 = col_datetime(format = ""),
學期 = col_integer(),
學年 = col_integer(),
學系 = col_character()
)
libraryData %>%
group_by(學院) %>%
nest(.key="各院借閱資料") ->
libraryDataNested
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.